#delimit;
clear all;
set more off;
set logtype text;
capture log close regressions_collapsed_IV;

** REPLACE FILE PATH WITH PATH TO RELEVANT REPLICATION FILES;
local fileloc = "~/KMS_REPLICATION";
pause on;
set maxvar 30000;
set matsize 10000;

log using `fileloc'/log_files/regressions_collapsed_IV.txt, replace name(regressions_collapsed_IV);

** Specify basic appearance of graphs using global;
global graph_options "graphregion(fcolor(white) color(white) icolor(white)) plotregion()";

** NOTE: do-file calls other do-files at various points, indicating both the path specified in the local `fileloc' and the data file to be called when using subgroup analysis;

** Run regression prep;
do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _all;

eststo clear;

***** VARIABLE LABELS;
label variable died "Infant Death";
label variable male "Male";
label variable black "Black";
label variable asian "Asian";
label variable hisp "Hispanic";
label variable other_race "Other Race";
label variable HS_grad "Mother is HS Grad";
label variable college_grad  "Mother is College Grad";
label variable twins  "Twins";
label variable trip_or_more "Triplets or More";
label variable age19_25 "Mother Age 19 - 25";
label variable age26_30 "Mother Age 26 - 30";
label variable age31_35 "Mother Age 31 - 35";
label variable age36up "Mother Age > 35";
label variable medicaid "Medicaid";
label variable care_first_tri "Care 1st Trimester";
label variable low_weight "Low Birth Weight";
label variable premature "Premature";
label variable weekly_pm10 "PM10";
label variable weekly_co "CO";
label variable weekly_oz "O3";


**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
**XXXXXXXXXX PROGRAMS, LOCALS, AND GLOBALS REFERENCED XXXXXXXXXXXXXXX;
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;

** Used to calculate effects of standard deviations after regressions and marginal effects comparable to prior estimates. See text;
cap program drop effects;
program define effects;
	estadd scalar deaths_compare_`2' = (_b[`1']/1000) * 52 * 100000;
	estadd scalar deaths_compare_`2'_w = ((_b[`1']/1000)*$`1'_within_sd) * 52 * 100000;
	estadd scalar deaths_compare_`2'_b = ((_b[`1']/1000)*$`1'_between_sd) * 52 * 100000;
end;

** Get standard deviations;
preserve;
bysort mother_zip merging_week: keep if _n == 1;
xtset mother_zip;
foreach var in weekly_co weekly_pm10 {;
	xtsum `var';
	global `var'_within_sd = r(sd_w);
	di $`var'_within_sd;
	global `var'_between_sd = r(sd_b);
	di $`var'_between_sd;
};
restore;

**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
** TABLE 8; 
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;

** Make coefficients easier to read by reducing leading zeros;
replace died = died * 1000;

** Note dividing to make coefficients and standard errors easier for Stata to calculate;
foreach weather in max_temp windspeed humidS {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3/1000;
};

foreach weather in rain days_with_rain days_with_fog {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3;
};

** Instrumentation involves both total flow AND weather interactions – this code generates said interactions;
** Global "weather_linear" defined by the use of "run regression prep" above;
foreach weathervar in $weather_linear $weather_quadratic $weather_cubic {;
	gen double tot_flowIV_`weathervar' = tot_flow_base*`weathervar';
};

eststo clear;

egen maxtraffic = max(tot_flow_base), by(mother_zip);
drop if maxtraffic == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;


**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
**XXXXXXXXXXXXXX Main IV regressions XXXXXXXXXXXXXXXXXX:
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;

**XXXXXXXXXXXXXX          CO        XXXXXXXXXXXXXXXXXXX;

xtset zip_rmonth;
xtivreg2 died (weekly_co = tot_flow_base tot_flowIV_*) $triweekly_co $weather1 $weather2 $controls $agespline _I* [fw = weight], nocollin ffirst fe cluster(mother_zip);
_eststo;

capture matrix drop F;
matrix F = e(first);
estadd scalar CO_APF = F[7,1]; 

effects "weekly_co" "CO";

**XXXXXXXXXXXXXX        PM10        XXXXXXXXXXXXXXXXXXX;

xtset zip_rmonth;
xtivreg2 died (weekly_pm10 = tot_flow_base tot_flowIV_*) $triweekly_pm10  $weather1 $weather2 $controls $agespline _I* [fw = weight], ffirst  nocollin fe cluster(mother_zip);
_eststo;

capture matrix drop F;
matrix F = e(first);
estadd scalar PM10_APF = F[7,1]; 

effects "weekly_pm10" "PM10";


**XXXXXXXXXXXXX          BOTH        XXXXXXXXXXXXXXXXXXX;

xtset zip_rmonth;
xtivreg2 died (weekly_co weekly_pm10 = tot_flow_base tot_flowIV_*) $triweekly_co $triweekly_pm10 $weather1 $weather2 $controls $agespline _I* [fw = weight], ffirst   nocollin fe cluster(mother_zip);
_eststo;


capture matrix drop F;
matrix F = e(first);
estadd scalar CO_APF = F[7,1]; 
estadd scalar PM10_APF = F[7,2]; 

effects "weekly_co" "CO";
effects "weekly_pm10" "PM10";


esttab using `fileloc'/regs/KMS_IV.xls, 
	replace
	tab
	keep(weekly_co weekly_pm10)
	title(IV Estimates and Expansions)		
	mlabels(, title)
	label
	cells(b(fmt(%9.4f) star) se(fmt(%9.4f))) 
	starlevels(\text{$^{\ast}$} 0.10 \text{$^{\ast\ast}$} 0.05 \text{$^{\ast\ast\ast}$} 0.01)
	stats(N CO_APF PM10_APF deaths_compare_CO  deaths_compare_PM10 deaths_compare_CO_w deaths_compare_PM10_w deaths_compare_CO_b deaths_compare_PM10_b, label(Obs. "CO" "PM10" "CO" "PM10" "CO" "PM10" "CO" "PM10" ) fmt(%9.0fc %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f));	

log close regressions_collapsed_IV;
